import re

import pandas as pd

if __name__ == '__main__':

    df = pd.read_excel(r"D:\Download\WeChat Files\wxid_kdchbeq2xllp22\FileStorage\File\2025-05\通州对比结果.xlsx")


    def t(address):
        if address is None:
            return pd.Series([None, None, None, None])
        # 分别匹配楼、单元、层、室的正则表达式
        building_pattern = r'([a-zA-Z0-9甲乙丙丁]+)(?:号楼|#楼|#|楼|座|栋|幢)'
        building_pattern2 = r'(\d+)号'
        building_pattern3 = r'-([a-zA-Z0-9甲乙丙丁]+)-\d+单元'
        building_pattern4 = r'([东南西北])侧楼'
        unit_pattern = r'([a-zA-Z0-9]+|东|西|南|北)(?:单元|单-)'
        floor_pattern = r'(\d+)层'
        room_pattern = r'([a-zA-Z0-9]+)(?:号)?室'

        unit_match = re.search(unit_pattern, address)
        floor_match = re.search(floor_pattern, address)
        room_match = re.search(room_pattern, address)
        building_match = re.search(building_pattern, address)
        if building_match is None:
            building_match = re.search(building_pattern3, address)
            if building_match is None:
                building_match = re.search(building_pattern4, address)
            if building_match is None:
                building_match = re.search(building_pattern2, address)
                # 判断号是否出现在最后
                if building_match is not None:
                    if building_match.span()[0] == (room_match.span()[0] if room_match else 0):
                        building_match = None

        building = building_match.group(1) if building_match else None
        unit = unit_match.group(1) if unit_match else None
        floor = floor_match.group(1) if floor_match else None
        room = room_match.group(1) if room_match else None
        return pd.Series([building, unit, floor, room])


    # 备份原始列（避免直接修改原数据）
    original_装机地址 = df['装机地址'].copy()
    original_地址 = df['地址'].copy()

    # 执行你的操作
    df['装机地址'] = df['装机地址'] + '室'
    df['地址'] = df['地址'] + '室'
    df[['楼号', '单元号', '层号', '室号']] = df['装机地址'].astype(str).apply(t)
    df[['楼号2', '单元号2', '层号2', '室号2']] = df['地址'].astype(str).apply(t)
    df['是否一一对应'] = (df['楼号'] == df['楼号2']) & (df['单元号'] == df['单元号2']) & (df['层号'] == df['层号2']) & (
            df['室号'] == df['室号2'])

    # 写入Excel前恢复原始值
    df['装机地址'] = original_装机地址
    df['地址'] = original_地址

    # 输出到文件
    df.to_excel("/temp/地址.xlsx", index=False)
